This will be an example notebook showing exploratory regression analysis with a simple, point-based hedonic house price model for Baltimore


In [1]:
import pysal as ps
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_context('talk')
%matplotlib inline


/home/ljw/.local/lib/python2.7/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))

In [2]:
ps.examples.available()


Out[2]:
['baltim',
 'juvenile',
 'taz',
 'networks',
 'arcgis',
 'desmith',
 'virginia',
 'nat',
 'sacramento2',
 'Polygon',
 'south',
 'Point',
 'book',
 '10740',
 'chicago',
 'newHaven',
 'sids2',
 'mexico',
 'Line',
 'wmat',
 'street_net_pts',
 'us_income',
 'columbus',
 'burkitt',
 'geodanet',
 'stl',
 'snow_maps',
 'calemp']

In [3]:
ps.examples.explain('baltim')


Out[3]:
{'description': 'Baltimore house sales prices and hedonics',
 'explanation': ['* baltim.dbf attribute data',
  '* baltim.shp shape file',
  '* baltim.shx spatial index file',
  '* baltim.tri.k12.kwt Kernel weights using a triangular kernel with 12 neares',
  '  neighbors',
  '* baltim_k4.gwt Nearest neighbor weights (4nn)',
  '* baltim_q.gal Queen contiguity file',
  '* baltimore.geojson',
  'Point data, n=211, k= 17.'],
 'name': 'baltim'}

In [4]:
data = ps.pdio.read_files(ps.examples.get_path('baltim'))

In [5]:
data.head()


Out[5]:
STATION PRICE NROOM DWELL NBATH PATIO FIREPL AC BMENT NSTOR GAR AGE CITCOU LOTSZ SQFT X Y geometry
0 1 47.0 4 0 1.0 0 0 0 2 3 0 148 0 5.70 11.25 907 534 (907.0, 534.0)
1 2 113.0 7 1 2.5 1 1 1 2 2 2 9 1 279.51 28.92 922 574 (922.0, 574.0)
2 3 165.0 7 1 2.5 1 1 0 3 2 2 23 1 70.64 30.62 920 581 (920.0, 581.0)
3 4 104.3 7 1 2.5 1 1 1 2 2 2 5 1 174.63 26.12 923 578 (923.0, 578.0)
4 5 62.5 7 1 1.5 1 1 0 2 2 0 19 1 107.80 22.04 918 574 (918.0, 574.0)

In [6]:
mindist = ps.min_threshold_dist_from_shapefile(ps.examples.get_path('baltim.shp'))
mindist


Out[6]:
21.319005605327842

In [7]:
W = ps.threshold_binaryW_from_array(np.array([data.X.values, data.Y.values]).T, 2*mindist)

In [8]:
W = ps.W(W.neighbors, W.weights)
W.transform = 'r'

In [9]:
ycols = ['PRICE']
xcols = ['NROOM', 'DWELL', 'LOTSZ', 'SQFT']#, 'AGE']#, 'NBATH', 'PATIO', 'FIREPL', 'AC', 'BMENT', 'NSTOR', 'GAR', ]
y = data[ycols].values
X = data[xcols].values

In [10]:
ols_reg = ps.spreg.OLS(y, X, w=W, spat_diag=True, moran=True, name_y=ycols, 
                       name_x = xcols)

In [11]:
print(ols_reg.summary)


REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :     unknown
Weights matrix      :     unknown
Dependent Variable  :   ['PRICE']                Number of Observations:         211
Mean dependent var  :     44.3072                Number of Variables   :           5
S.D. dependent var  :     23.6061                Degrees of Freedom    :         206
R-squared           :      0.5026
Adjusted R-squared  :      0.4929
Sum squared residual:   58207.850                F-statistic           :     52.0363
Sigma-square        :     282.562                Prob(F-statistic)     :   3.048e-30
S.E. of regression  :      16.810                Log likelihood        :    -892.297
Sigma-square ML     :     275.867                Akaike info criterion :    1794.595
S.E of regression ML:     16.6092                Schwarz criterion     :    1811.354

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     t-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT       6.7704906       5.5586180       1.2180169       0.2246108
               NROOM       3.0124753       1.3568591       2.2201828       0.0274963
               DWELL      11.0148021       2.8169537       3.9101822       0.0001252
               LOTSZ       0.1238061       0.0199574       6.2035243       0.0000000
                SQFT       0.4277341       0.2023011       2.1143443       0.0356884
------------------------------------------------------------------------------------

REGRESSION DIAGNOSTICS
MULTICOLLINEARITY CONDITION NUMBER           16.124

TEST ON NORMALITY OF ERRORS
TEST                             DF        VALUE           PROB
Jarque-Bera                       2         544.545           0.0000

DIAGNOSTICS FOR HETEROSKEDASTICITY
RANDOM COEFFICIENTS
TEST                             DF        VALUE           PROB
Breusch-Pagan test                4          82.085           0.0000
Koenker-Bassett test              4          17.638           0.0015

DIAGNOSTICS FOR SPATIAL DEPENDENCE
TEST                           MI/DF       VALUE           PROB
Moran's I (error)              0.0287         5.646           0.0000
Lagrange Multiplier (lag)         1          31.726           0.0000
Robust LM (lag)                   1          22.882           0.0000
Lagrange Multiplier (error)       1           9.303           0.0023
Robust LM (error)                 1           0.458           0.4985
Lagrange Multiplier (SARMA)       2          32.184           0.0000

================================ END OF REPORT =====================================

In [12]:
effects, errs = ols_reg.betas, ols_reg.std_err

In [13]:
#plt.plot(range(0,len(effects.flatten())), effects.flatten(), '.k')
plt.title('Regression Effects plot')
plt.axis([-1,5, -12,30])
plt.errorbar(range(0,len(effects.flatten())), effects, yerr=errs.flatten()*2, fmt='.k', ecolor='r', capthick=True)
plt.hlines(0, -1, 5, linestyle='--', color='k')


Out[13]:
<matplotlib.collections.LineCollection at 0x7f9ce2829890>

In [14]:
resids = y - ols_reg.predy

In [15]:
Mresids = ps.Moran(resids.flatten(), W)

In [16]:
fig, ax = plt.subplots(1,3,figsize=(12*1.6,6))
for xi,yi,alpha in zip(data.X.values, data.Y.values, resids, ):
    if alpha+ ols_reg.std_y < 0:
        color='r'
    elif alpha - ols_reg.std_y > 0:
        color='b'
    else:
        color='k'
    ax[0].plot(xi,yi,color=color, marker='o', alpha = np.abs(alpha))#, alpha=alpha)
ax[0].axis([850, 1000, 500, 590])
ax[0].text(x=860, y=580, s='$I = %.3f (%.2f)$' % (Mresids.I, Mresids.p_sim))


ax[1].plot(ols_reg.predy, resids, 'o')
ax[1].axis([15,110,-60,120])
ax[1].hlines(0,0,150, linestyle='--', color='k')
ax[1].set_xlabel('Prediction')
ax[1].set_ylabel('Residuals')

H = np.dot(X, np.linalg.inv(np.dot(X.T, X)))
H = np.dot(H, X.T)

lev = H.diagonal().reshape(-1,1)

ax[2].plot(lev, resids, '.k')
ax[2].hlines(0,0,.2,linestyle='--', color='k')
ax[2].set_xlabel('Leverage')
ax[2].set_ylabel('Residuals')
ax[2].legend(labels=['Residuals'])

ax[0].set_axis_bgcolor('white')
ax[0].set_xticks([])
ax[0].set_yticks([])

ax[0].set_title('Spatial Error in House Price Prediction')
ax[1].set_title('Residuals vs. Prediction')
ax[2].set_title('Residuals vs. Leverage')


plt.show()



In [17]:
ml_lag = ps.spreg.ML_Lag(y, X, w=W)#, name_y=ycols, name_x = xcols)
effects, errs = ml_lag.betas, ml_lag.std_err


/home/ljw/.local/lib/python2.7/site-packages/scipy/optimize/_minimize.py:593: RuntimeWarning: Method 'bounded' does not support relative tolerance in x; defaulting to absolute tolerance.
  "defaulting to absolute tolerance.", RuntimeWarning)

In [18]:
print(ml_lag.summary)


REGRESSION
----------
SUMMARY OF OUTPUT: MAXIMUM LIKELIHOOD SPATIAL LAG (METHOD = FULL)
-----------------------------------------------------------------
Data set            :     unknown
Weights matrix      :     unknown
Dependent Variable  :     dep_var                Number of Observations:         211
Mean dependent var  :     44.3072                Number of Variables   :           6
S.D. dependent var  :     23.6061                Degrees of Freedom    :         205
Pseudo R-squared    :      0.5469
Spatial Pseudo R-squared:  0.5443
Sigma-square ML     :     251.400                Log likelihood        :    -883.789
S.E of regression   :      15.856                Akaike info criterion :    1779.578
                                                 Schwarz criterion     :    1799.689

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     z-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT     -21.8565350       7.0039291      -3.1206106       0.0018048
               var_1       2.3380132       1.2815163       1.8244115       0.0680899
               var_2      10.1102253       2.6587786       3.8025827       0.0001432
               var_3       0.1137310       0.0188473       6.0343349       0.0000000
               var_4       0.3476010       0.1911961       1.8180340       0.0690589
           W_dep_var       0.8190865       0.1122342       7.2980102       0.0000000
------------------------------------------------------------------------------------
================================ END OF REPORT =====================================

In [19]:
plt.title('Regression Effects plot')
plt.axis([-1,5, -38,20])
plt.errorbar(range(0,len(effects.flatten())), effects, yerr=errs.flatten()*2, fmt='.k', ecolor='r', capthick=True)
plt.hlines(0, -1, 13, linestyle='--', color='k')


Out[19]:
<matplotlib.collections.LineCollection at 0x7f9ce1f002d0>

In [20]:
resids = y - ml_lag.predy
Mresids = ps.Moran(resids.flatten(), W)

In [21]:
fig, ax = plt.subplots(1,3,figsize=(12*1.6,6))
for xi,yi,alpha in zip(data.X.values, data.Y.values, resids):
    if alpha+ ols_reg.std_y < 0:
        color='r'
    elif alpha - ols_reg.std_y > 0:
        color='b'
    else:
        color='k'
    ax[0].plot(xi,yi,color=color, marker='o', alpha = np.abs(alpha))#, alpha=alpha)
ax[0].axis([850, 1000, 500, 590])
ax[0].text(x=860, y=580, s='$I = %.3f (%.2f)$' % (Mresids.I, Mresids.p_sim))



ax[1].plot(ols_reg.predy, resids, 'o')
ax[1].axis([15,110,-60,120])
ax[1].hlines(0,0,150, linestyle='--', color='k')
ax[1].set_xlabel('Prediction')
ax[1].set_ylabel('Residuals')

XtXi = np.linalg.inv(np.dot(X.T, X))
H = np.dot(X, XtXi)
H = np.dot(H, X.T)

lev = H.diagonal().reshape(-1,1)

ax[2].plot(lev, resids, '.k')
ax[2].hlines(0,0,.25,linestyle='--', color='k')
ax[2].set_xlabel('Tangental Leverage')
ax[2].set_ylabel('Residuals')
ax[2].axis([-.01,.2,-60,120])

ax[0].set_axis_bgcolor('white')
ax[0].set_xticks([])
ax[0].set_yticks([])

ax[0].set_title('Spatial Error in House Price Prediction')
ax[1].set_title('Residuals vs. Prediction')
ax[2].set_title('Residuals vs. Tangental Leverage')


plt.show()



In [22]:
xcols.append('AGE')

In [23]:
X = data[xcols].values

In [24]:
reg_ommit = ps.spreg.OLS(y,X, name_y = ycols, name_x = xcols)
effects, errs = reg_ommit.betas, reg_ommit.std_err
print(reg_ommit.summary)


REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :     unknown
Weights matrix      :        None
Dependent Variable  :   ['PRICE']                Number of Observations:         211
Mean dependent var  :     44.3072                Number of Variables   :           6
S.D. dependent var  :     23.6061                Degrees of Freedom    :         205
R-squared           :      0.5585
Adjusted R-squared  :      0.5478
Sum squared residual:   51659.953                F-statistic           :     51.8746
Sigma-square        :     252.000                Prob(F-statistic)     :   1.339e-34
S.E. of regression  :      15.875                Log likelihood        :    -879.707
Sigma-square ML     :     244.834                Akaike info criterion :    1771.414
S.E of regression ML:     15.6472                Schwarz criterion     :    1791.526

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     t-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT      14.6489181       5.4722022       2.6769695       0.0080306
               NROOM       3.1432333       1.2816359       2.4525165       0.0150221
               DWELL       8.4680298       2.7067609       3.1284735       0.0020130
               LOTSZ       0.1164694       0.0189021       6.1617341       0.0000000
                SQFT       0.5602420       0.1928078       2.9057021       0.0040667
                 AGE      -0.2936768       0.0576128      -5.0974247       0.0000008
------------------------------------------------------------------------------------

REGRESSION DIAGNOSTICS
MULTICOLLINEARITY CONDITION NUMBER           17.420

TEST ON NORMALITY OF ERRORS
TEST                             DF        VALUE           PROB
Jarque-Bera                       2         825.062           0.0000

DIAGNOSTICS FOR HETEROSKEDASTICITY
RANDOM COEFFICIENTS
TEST                             DF        VALUE           PROB
Breusch-Pagan test                5         112.426           0.0000
Koenker-Bassett test              5          20.611           0.0010
================================ END OF REPORT =====================================

In [25]:
#plt.plot(range(0,len(effects.flatten())), effects.flatten(), '.k')
plt.title('Regression Effects plot')
plt.axis([-1,6, -5,28])
plt.errorbar(range(0,len(effects.flatten())), effects, yerr=errs.flatten()*2, fmt='.k', ecolor='r', capthick=True)
plt.hlines(0, -1, 13, linestyle='--', color='k')


Out[25]:
<matplotlib.collections.LineCollection at 0x7f9ce2a28b10>

In [26]:
resids = y - reg_ommit.predy
Mresids = ps.Moran(resids.flatten(), W)

In [27]:
fig, ax = plt.subplots(1,3,figsize=(12*1.6,6))
for xi,yi,alpha in zip(data.X.values, data.Y.values, resids, ):
    if alpha+ ols_reg.std_y < 0:
        color='r'
    elif alpha - ols_reg.std_y > 0:
        color='b'
    else:
        color='k'
    ax[0].plot(xi,yi,color=color, marker='o', alpha = np.abs(alpha))#, alpha=alpha)
ax[0].axis([850, 1000, 500, 590])
ax[0].text(x=860, y=580, s='$I = %.3f (%.2f)$' % (Mresids.I, Mresids.p_sim))



ax[1].plot(ols_reg.predy, resids, 'o')
ax[1].axis([15,110,-60,120])
ax[1].hlines(0,0,150, linestyle='--', color='k')
ax[1].set_xlabel('Prediction')
ax[1].set_ylabel('Residuals')

H = np.dot(X, np.linalg.inv(np.dot(X.T, X)))
H = np.dot(H, X.T)

lev = H.diagonal().reshape(-1,1)

ax[2].plot(lev, resids, '.k')
ax[2].hlines(0,0,.25,linestyle='--', color='k')
ax[2].set_xlabel('Leverage')
ax[2].set_ylabel('Residuals')
ax[2].legend(labels=['Residuals'])

ax[0].set_axis_bgcolor('white')
ax[0].set_xticks([])
ax[0].set_yticks([])

ax[0].set_title('Spatial Error in House Price Prediction')
ax[1].set_title('Residuals vs. Prediction')
ax[2].set_title('Residuals vs. Leverage')


plt.show()



In [28]:
xcols.extend(['NBATH', 'PATIO', 'FIREPL', 'AC', 'BMENT', 'NSTOR', 'GAR', ])
X = data[xcols].values
reg_ommit = ps.spreg.OLS(y,X, name_y = ycols, name_x = xcols)
effects, errs = reg_ommit.betas, reg_ommit.std_err
resids = y - reg_ommit.predy
print(reg_ommit.summary)


REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :     unknown
Weights matrix      :        None
Dependent Variable  :   ['PRICE']                Number of Observations:         211
Mean dependent var  :     44.3072                Number of Variables   :          13
S.D. dependent var  :     23.6061                Degrees of Freedom    :         198
R-squared           :      0.6946
Adjusted R-squared  :      0.6761
Sum squared residual:   35734.671                F-statistic           :     37.5332
Sigma-square        :     180.478                Prob(F-statistic)     :   1.512e-44
S.E. of regression  :      13.434                Log likelihood        :    -840.824
Sigma-square ML     :     169.359                Akaike info criterion :    1707.648
S.E of regression ML:     13.0138                Schwarz criterion     :    1751.222

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     t-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT      21.2418739       6.5908741       3.2229221       0.0014839
               NROOM       0.0750767       1.1641658       0.0644897       0.9486454
               DWELL       9.5932682       2.6479575       3.6228936       0.0003702
               LOTSZ       0.0493600       0.0181753       2.7157748       0.0071968
                SQFT       0.4100902       0.2349199       1.7456594       0.0824213
                 AGE      -0.1343020       0.0560587      -2.3957414       0.0175179
               NBATH       4.1482796       1.9921287       2.0823351       0.0385969
               PATIO      10.0855860       2.9546155       3.4135020       0.0007777
              FIREPL      10.0361943       2.6181713       3.8332840       0.0001698
                  AC       9.0893190       2.5765338       3.5277313       0.0005209
               BMENT       3.0383161       1.0769640       2.8211863       0.0052721
               NSTOR      -4.6304236       3.0734067      -1.5066095       0.1335045
                 GAR       5.7597950       1.8543511       3.1060973       0.0021740
------------------------------------------------------------------------------------

REGRESSION DIAGNOSTICS
MULTICOLLINEARITY CONDITION NUMBER           28.304

TEST ON NORMALITY OF ERRORS
TEST                             DF        VALUE           PROB
Jarque-Bera                       2         393.569           0.0000

DIAGNOSTICS FOR HETEROSKEDASTICITY
RANDOM COEFFICIENTS
TEST                             DF        VALUE           PROB
Breusch-Pagan test               12         190.278           0.0000
Koenker-Bassett test             12          46.141           0.0000
================================ END OF REPORT =====================================

In [29]:
plt.title('Regression Effects plot')
plt.axis([-1,13, -12,35])
plt.errorbar(range(0,len(effects.flatten())), effects, yerr=errs.flatten()*2, fmt='.k', ecolor='r', capthick=True)
plt.hlines(0, -1, 13, linestyle='--', color='k', linewidth=.9)


Out[29]:
<matplotlib.collections.LineCollection at 0x7f9ce2053990>

In [30]:
Mresids = ps.Moran(resids, W)

In [31]:
fig, ax = plt.subplots(1,3,figsize=(12*1.6,6))
for xi,yi,alpha in zip(data.X.values, data.Y.values, resids, ):
    if alpha+ ols_reg.std_y < 0:
        color='r'
    elif alpha - ols_reg.std_y > 0:
        color='b'
    else:
        color='k'
    ax[0].plot(xi,yi,color=color, marker='o', alpha = np.abs(alpha))#, alpha=alpha)
ax[0].axis([850, 1000, 500, 590])
ax[0].text(x=860, y=580, s='$I = %.3f (%.2f)$' % (Mresids.I, Mresids.p_sim))


ax[1].plot(ols_reg.predy, resids, 'o')
ax[1].axis([15,110,-60,120])
ax[1].hlines(0,0,150, linestyle='--', color='k')
ax[1].set_xlabel('Prediction')
ax[1].set_ylabel('Residuals')


H = np.dot(X, np.linalg.inv(np.dot(X.T, X)))
H = np.dot(H, X.T)

lev = H.diagonal().reshape(-1,1)

ax[2].plot(lev, resids, '.k')
ax[2].hlines(0,0,.25,linestyle='--', color='k')
ax[2].set_xlabel('Leverage')
ax[2].set_ylabel('Residuals')
ax[2].legend(labels=['Residuals'])

ax[0].set_axis_bgcolor('white')
ax[0].set_xticks([])
ax[0].set_yticks([])

ax[0].set_title('Spatial Error in House Price Prediction')
ax[1].set_title('Residuals vs. Prediction')
ax[2].set_title('Residuals vs. Leverage')


plt.show()



In [32]:
reg_ommit = ps.spreg.ML_Lag(y,X, w=W)
effects, errs = reg_ommit.betas, reg_ommit.std_err
resids = y - reg_ommit.predy
print(reg_ommit.summary)


REGRESSION
----------
SUMMARY OF OUTPUT: MAXIMUM LIKELIHOOD SPATIAL LAG (METHOD = FULL)
-----------------------------------------------------------------
Data set            :     unknown
Weights matrix      :     unknown
Dependent Variable  :     dep_var                Number of Observations:         211
Mean dependent var  :     44.3072                Number of Variables   :          14
S.D. dependent var  :     23.6061                Degrees of Freedom    :         197
Pseudo R-squared    :      0.7002
Spatial Pseudo R-squared:  0.7023
Sigma-square ML     :     166.248                Log likelihood        :    -839.044
S.E of regression   :      12.894                Akaike info criterion :    1706.088
                                                 Schwarz criterion     :    1753.014

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     z-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT       8.2254368       9.3915042       0.8758381       0.3811181
               var_1      -0.0461611       1.1184407      -0.0412727       0.9670785
               var_2       8.9756469       2.5580360       3.5088039       0.0004501
               var_3       0.0474538       0.0174714       2.7160802       0.0066060
               var_4       0.3975868       0.2256318       1.7621043       0.0780517
               var_5      -0.1280838       0.0539477      -2.3742224       0.0175860
               var_6       3.6580590       1.9240915       1.9011877       0.0572774
               var_7       9.8213381       2.8379330       3.4607364       0.0005387
               var_8       9.7028519       2.5199025       3.8504870       0.0001179
               var_9       7.9896554       2.5221661       3.1677752       0.0015361
              var_10       2.8412072       1.0376538       2.7381070       0.0061794
              var_11      -4.9663093       2.9534240      -1.6815429       0.0926575
              var_12       5.7060098       1.7798192       3.2059491       0.0013462
           W_dep_var       0.3859748       0.1944353       1.9851065       0.0471326
------------------------------------------------------------------------------------
================================ END OF REPORT =====================================

In [33]:
#plt.plot(range(0,len(effects.flatten())), effects.flatten(), '.k')
plt.title('Regression Effects plot')
plt.axis([-1,14, -10,20])
plt.errorbar(range(0,len(effects.flatten())), effects, yerr=errs.flatten(), fmt='.k', ecolor='r', capthick=True)
plt.hlines(0, -1, 14, linestyle='--', color='k')


Out[33]:
<matplotlib.collections.LineCollection at 0x7f9cdbfa0990>

In [34]:
Mresids = ps.Moran(resids, W)

In [35]:
fig, ax = plt.subplots(1,3,figsize=(12*1.6,6))
for xi,yi,alpha in zip(data.X.values, data.Y.values, resids, ):
    if alpha+ ols_reg.std_y < 0:
        color='r'
    elif alpha - ols_reg.std_y > 0:
        color='b'
    else:
        color='k'
    ax[0].plot(xi,yi,color=color, marker='o', alpha = np.abs(alpha))#, alpha=alpha)
ax[0].axis([850, 1000, 500, 590])
ax[0].text(x=860, y=580, s='$I = %.3f (%.2f)$' % (Mresids.I, Mresids.p_sim))


ax[1].plot(ols_reg.predy, resids, 'o')
ax[1].axis([15,110,-60,120])
ax[1].hlines(0,0,150, linestyle='--', color='k')
ax[1].set_xlabel('Prediction')
ax[1].set_ylabel('Residuals')


H = np.dot(X, np.linalg.inv(np.dot(X.T, X)))
H = np.dot(H, X.T)

lev = H.diagonal().reshape(-1,1)

ax[2].plot(lev, resids, '.k')
ax[2].hlines(0,0,.25,linestyle='--', color='k')
ax[2].set_xlabel('Tangental Leverage')
ax[2].set_ylabel('Residuals')

ax[0].set_axis_bgcolor('white')
ax[0].set_xticks([])
ax[0].set_yticks([])

ax[0].set_title('Spatial Error in House Price Prediction')
ax[1].set_title('Residuals vs. Prediction')
ax[2].set_title('Residuals vs. Leverage')


plt.show()



In [ ]: